*This program runs a probit and calculates the associated marginal effects
*for stocks, own business and home in the US as a whole
*It's used on the paper on wealth counterfactuals 
*for the final verification for the Review of Economics and Statistics
*ATTENTION: For the program to run, one has to download the package mat2txt from the Internet
*June 2011 - Dimitris Christelis

version 9.2
cap clear all
macro drop _all
clear
set mem 500m
set more off

*Paths
do "c:/foldp/foldp.do"
global projf "${foldp}/Wealth Counterfactuals/Verification"

global wgt wgth

global assetl1 rfin home ownb secdebt liab tdebt
global assetl2 hrfin hhome hownb hsecdebt hliab htdebt
local nar: word count $assetl1


forvalues ll=1/`nar' {

   *Globals of asset
   global as: word `ll' of $assetl1
   global hhas: word `ll' of $assetl2

   *Logging
   cap log close
   log using "${projf}/Programs/HRS/Country/${as}_usall.log", replace

   global pdate "$S_DATE" 
   global ptime "$S_TIME"
   display "log file for ${as} in whole US opened on ${pdate} at ${ptime}"

   *Reading the parameters, variable lists etc.
   do "${projf}/Programs/Parameters.do"

   use "${projf}/Data/HRS/HRS_DG.dta", clear

   *Checking the sample weights
   assert abs(wgth-wgtach)<1e-6
   drop wgtach

   *Keeping only heads
   keep if head==1 & ${wgt}>0 & ${wgt}!=.

   *Total debt
   qui gen double htdebtv = hmortv+hhmlov+hliabv
   qui gen byte htdebto = (htdebtv>tolr)

   *Secured debt, except on 2nd house
   qui ge double hsecdebtv = hmortv + hhmlov /*+ hmort2v*/
   qui ge double hsecdebto = (hsecdebtv>tolr)

   *Ownership dummy
   *qui gen byte ${hhas}o=(${hhas}v>tolr)
   
   *Rescaling age
   qui replace age=age/sc_age
   qui gen double agesq=age^2

   *Creating net worth and income quartiles
   *Net worth, equal total minus the asset
   if "${as}"=="rfin" | "${as}"=="home" | "${as}"=="ownb" {
      qui gen double nw=hnetwv-${hhas}v
   }
   *For debts, we add back instead of substracting
   if ! ("${as}"=="rfin" | "${as}"=="home" | "${as}"=="ownb") {
      qui gen double nw=hnetwv+${hhas}v
   }
   
   *Income, equal to total minus capital income
   qui gen double inc=hgtincv-hcpincv

   *Bad health dummy
   qui gen byte badhs=(srhealtha>=4)

   *College dummy
   qui gen byte college=(edu==3)
   
   *High school dummy
   qui gen byte highs=(edu==2)

   *Couple dummy
   qui gen byte couple=(mstatus==1)
   
   *Widow dummy
   qui gen byte widow=(mstatus==3)
   
   *Never married
   qui gen byte nevmar=(mstatus==4)
      
   *Creating wealth and income quartiles
   qui su nw if head==1 [aw=wgth], detail
   qui gen double wqnw1=(nw<=r(p25))
   qui gen double wqnw2=(nw>r(p25) & nw<=r(p50))
   qui gen double wqnw3=(nw>r(p50) & nw<=r(p75))
   qui gen double wqnw4=(nw>r(p75))
   qui su inc if head==1 [aw=wgth], detail
   qui gen double wqinc1=(inc<=r(p25))
   qui gen double wqinc2=(inc>r(p25) & inc<=r(p50))
   qui gen double wqinc3=(inc>r(p50) & inc<=r(p75))
   qui gen double wqinc4=(inc>r(p75))

   *Saving the dataset
   qui save "${projf}/Results/HRS/Country/${as}/temp.dta", replace

   probit ${hhas}o ${basiccovs}, r

   *Coefficient matrix
   mat olco=e(b)
  
   *Augmenting the coefficient matrix by an empty column, the number of obs and the log likelihood
   mat blank = .
   mat colnames blank=blank
  
   mat olN = e(N)
   mat colnames olN=nobs
  
   mat ill = e(ll)
   mat colnames ill=loglik

   mat olco=[olco,blank,olN,ill]
    
   *Var-Covar matrix
   mat olvar=e(V)

   *Saving matrix of regression results as a text file
  
   *Number of variables
   local nc=colsof(olvar)
   global nc=`nc'
   matrix olout=J(`nc'+3,6,.)
  
   *Putting in the regression coefficients 
   forvalues k=1/`nc' {
      matrix olout[`k',1]=olco[1,`k']
      matrix olout[`k',2]=sqrt(olvar[`k',`k'])
      matrix olout[`k',3]=olco[1,`k']/sqrt(olvar[`k',`k'])
      matrix olout[`k',4]=2*ttail(e(N) - `nc',abs(olout[`k',3]))
      matrix olout[`k',5] = olout[`k',1] - invttail(e(N) - `nc',sigl/2)*olout[`k',2]
      matrix olout[`k',6] = olout[`k',1] + invttail(e(N) - `nc',sigl/2)*olout[`k',2]
   }
  
   *Adding number of observations used in first stage probit
   matrix olout [`nc'+2,1]=e(N)
   *Adding log likelihood
   matrix olout [`nc'+3,1]=e(ll)

   *Labels of matrix of regression output
   *Column labels
   local colmat coeff se tstat pvalue ci_low ci_high
   matrix colnames olout = `colmat' 
   *Row labels
   matrix rownames olout = ${basiccovs} _cons blank nobs loglik
   *Displaying and saving the matrix of results
   mat list olout,format(%15.5f)
   mat2txt, mat(olout) saving("${projf}/Results/HRS/Country/${as}/probit_est_usall_${as}.txt") replace format(%15.5f) ///
     title("Regression for ${as} in the US as a whole")
  
   preserve
   *Saving matrices of coefficients and correlations and their covariance matrices
   *for each implicate
   drop _all
   svmat double olco, names(eqcol)
   qui save "${projf}/Results/HRS/Country/${as}/probit_coeff_usall_${as}.dta", replace
   drop _all
   svmat double olvar, names(eqcol)
   qui save "${projf}/Results/HRS/Country/${as}/probit_var_usall_${as}.dta", replace
   restore  

   *Bootstrap program

   *Creating a matrix with number of columsn equal to 1 + # bootstrap runs, storing
   *the main sample estimates in the first line, together with the proportion of
   *hhds owning the asset
   qui su ${hhas}o if head==1 [aw=$wgt], meanonly
   sca ${as}p=r(mean)
   mat olco=olco[1,1..${nc}]
   mat ${as}coeffall=(olco,${as}p)
   local nch=$nc+1
   local bn ""
   forvalues kk=1/`nch' {
     local bn `bn' b`kk'
   }
   mat colnames ${as}coeffall = `bn'   
   qui save temp2, replace
  
   *Subsequent runs, beginning of the bootstrap loop
   forvalues i=1/$finsim {

     if `i'==1 {
         local sr = 1
     }
     *Augmenting the index that tracks valid bootstrap runs by one
     if `i'>1 {
         local sr = `sr' + 1
     }
        
      use temp2, clear
      *Fixing the seed before each resampling
      local sd=1234+30*`i'
      set seed `sd'
      bsample
      
      *Probit regression in the bootstrapped sample
      if `i'==1 {
        probit ${hhas}o ${basiccovs}, r
      }
      if `i'>1 {
        qui probit ${hhas}o ${basiccovs}, r
      }
      
      mat a=e(V)
      local nc`i'=colsof(a)
            
      *If no variables are dropped from the probit then we proceed with updating the matrix
      if `nc`i''==$nc {
         qui su ${hhas}o if head==1 [aw=$wgt], meanonly
         sca ${as}p=r(mean)
         mat olco=e(b)
         mat ${as}coeff`sr'=(olco,${as}p)
         mat ${as}coeffall=(${as}coeffall \ ${as}coeff`sr')

         forvalues kk=1/10 {
            if `sr'==$nsim*(`kk'/10) {
               noisily di in yellow `sr' in yellow "*" _continue 
            }
         }

         mat drop ${as}coeff`sr'
      }
      
      *If some variables are dropped from the probit in the bootstrapped samples 
      *we put back the index to its previous value
      if `nc`i''~=$nc {
         local sr=`sr'-1
      }
            
      *If the number of valid bootstrap runs is equal to the predetermined
      *one then the loop over bootstrap runs stops
      if `sr'==$nsim {
         di in yellow "End of ${nsim} valid boostrap runs, total runs equal to `i'"
         continue, break
      }
      
   }  /* End of loop over bootstrap runs */

   *Saving the matrix of bootstrap coefficients and sample proportions
   drop _all
   svmat double ${as}coeffall, names(eqcol)
   qui save "${projf}/Results/HRS/Country/${as}/boot_drawcoeff_usall_${as}.dta", replace


***********************************************************
*Calculation of marginal effects and associated magnitudes*
***********************************************************

      *Reading the matrices saved from the estimation part, to retrieve the # of obs
      use "${projf}/Results/HRS/Country/${as}/probit_coeff_usall_${as}.dta", clear
      mkmat _all , mat(a) 
      sca nobs=a[1,${nc}+2]

      *Reading the dataset
      use "${projf}/Results/HRS/Country/${as}/temp.dta", clear

      *Incrementing the continuous variables by the appropriate magnitudes
      *to compute marginal effects and elasticities
      *Variables that are not used in elasticity form
      foreach y in $con_lev {
         qui gen double `y'_pl=`y'+(ch_`y'/sc_`y')
      }   

      merge using "${projf}/Results/HRS/Country/${as}/boot_drawcoeff_usall_${as}.dta"   
      drop _merge   

      *Putting the values of the drawn coefficients equal to the dummy value from missing
      *in rows lower than the number of bootstraps, so that there are no missing values
      local nch = $nc + 1
      forvalues h=1/`nch' {   
         qui replace _b`h'=$dumval if _n>${nsim}+1  
      }      
   
      *Renaming the coefficients  
      local ncl = $nc - 1
      forvalues h=1/`ncl' {   
         local g: word `h' of ${basiccovs}
         rename _b`h' b_`g'   
      }   

      *Renaming the coefficient of the constant 
      *set trace on
      rename _b${nc} b_cons
   
      local nch = $nc + 1
      rename _b`nch' pr_usall_${as}_own
   
      *Loop over bootstrapped coefficients  
      *We have to take into account that the first line of the coefficients has
      *the one from the main estimation, which should not be used for the calculation 
      *of the distribution of marg. effects and their standard errors
      forvalues i=1/$finsim { 

         *Defining an index of valid bootstrap runs that will determine whether the 
         *loop finishes or not
         if `i'==1 {
            local sr = 2
         }
         *Augmenting the index that tracks valid bootstrap runs by one
         if `i'>1 {
            local sr = `sr' + 1
         }
         *If there were any condition on the coefficients that was violated by the current
         *draw then the index of valid bootstrap runs would be decreased by one at this point
         *in the program. For the pooled probit there is no such condition and thus the
         *index of valid bootstrap runs (sr) is equal to the on of total bootstrap runs (i)
   
         *Creating the linear prediction for the current draw of the coefficients
         qui gen double xb0 = b_cons[`i']
         foreach y in $basiccovs {
            qui replace xb0 = xb0 + b_`y'[`i']*`y' 
         }
      
         
         ******************************************************************************************        	 
         ************* 	
         *0/1 Dummies* 	
         ************* 	
    	
         foreach y in $dumlist_meff /* $dumlist */ {           	
  
            *Putting all dummies to zero          	
            *Removing the effect of the dummy.         	
            qui gen double xb_c1 = xb0 - b_`y'[`i']*`y'    	        	
            assert xb_c1~=. if xb0~=.   	        	
	    	        	
            *Putting back the dummy equal to 1 	
            qui gen double xb_c2 = xb_c1 + b_`y'[`i']    	        	
            assert xb_c2~=. if xb0~=.   	        	
    	
            *Loop over the marginal effect  	
            forvalues tt=1/$wcp { 	
               local wpf: word `tt' of ${prfx} 	
     	
      	       if `tt'==1 {    	
                  qui gen double pdif_`y'= normal(xb_c2) - normal(xb_c1) 	
               }    	
    	       if `tt'==2 {             	
	          qui ge double pdif_`y' = (normal(xb_c2) - normal(xb_c1))/normal(xb_c1)           	  
	       }    	
	       assert pdif_`y'~=.    	
  
               qui sum pdif_`y' [aw = $wgt], meanonly                    	
  
   	       if `i'==1 {                   	
	          qui ge double `wpf'_`y' = $dumval                   	
	       }                   	
               *Putting the effect at the appropriate line in the file
               qui replace `wpf'_`y' = r(mean) in `sr'                      	
	       assert `wpf'_`y'<$dumval if _n==`sr' 	
               drop pdif_`y'
	     		         	
            }   /* End of loop over magnitudes */ 	
            drop xb_c1 xb_c2 	
         }  /* End of loop over list of 0/1 dummies */        	
	 
           
         ******************************************************************************************        	 
         ************************ 	
         *Interdependent Dummies* 	
         ************************ 	
         
         foreach g in $groupno_meff  {  
  
            *Putting all dummies in a group equal to zero            
            qui gen double xb_c1 = xb0
            local m ${group`g'}
            foreach y in `m'  {            
               qui replace xb_c1 = xb_c1 - b_`y'[`i']*`y'            
            }	
  
            
            local m ${group`g'}
            foreach y in `m'  {     
  
               *Putting back the dummy equal to 1
     	       qui gen double xb_c2= xb_c1 + b_`y'[`i'] 

               *Loop over the elasticity and the 4 intervals
               forvalues tt=1/$wcp {
                  local wpf: word `tt' of ${prfx}    
          
	          if `tt'==1 {   
                     qui gen double pdif_`y' = normal(xb_c2) - normal(xb_c1)     
	          }   
    	          if `tt'==2 {   
	             qui gen double pdif_`y' = (normal(xb_c2) - normal(xb_c1))/normal(xb_c1)                     
	          }   
  	          assert pdif_`y'~=.   

  	          qui sum pdif_`y' [aw = $wgt], meanonly                     

  	          if `i'==1 {                     
	             qui ge double `wpf'_`y' = $dumval                     
	          }                     
                  qui replace `wpf'_`y' = r(mean) in `sr'                     
	          assert `wpf'_`y'<$dumval if _n==`sr'   
                  drop pdif_`y' 
  
              }  /* End of loop over magnitudes */
               drop xb_c2 

            }  /* End of loop over variables of a given group */       
            drop xb_c1
         }  /* End of loop over groups */       

         ******************************************************************************************        	 
         ********************** 	
         *Continuous variables* 	
         ********************** 	
    	
         foreach y in $conlist_meff  {           	

            qui gen double xb_c1 = xb0 
            assert xb_c1~=. if xb0~=.   	        	
	    	        	
            *Incrementing by the already determined sum	
            qui gen double xb_c2 = xb_c1 + b_`y'[`i']*(`y'_pl - `y')    	        	
            assert xb_c2~=. if xb0~=.   	        	
    	
            *Loop over the marginal effect 	
            forvalues tt=1/$wcp { 	
               local wpf: word `tt' of ${prfx} 	
     	
      	       if `tt'==1 {    	
                    qui gen double pdif_`y'= normal(xb_c2) - normal(xb_c1) 	
               }    	
    	       if `tt'==2 {             	
	          qui ge double pdif_`y' = (normal(xb_c2) - normal(xb_c1))/normal(xb_c1)           	  
	       }    	
	       assert pdif_`y'~=.    	

               qui sum pdif_`y' [aw = $wgt], meanonly                    	

   	       if `i'==1 {                   	
	          qui ge double `wpf'_`y' = $dumval                   	
	       }                   	
               *Putting the effect at the appropriate line in the file
               qui replace `wpf'_`y' = r(mean) in `sr'                      	
	       assert `wpf'_`y'<$dumval if _n==`sr' 	
  	       drop pdif_`y'	         	

            }   /* End of loop over magnitudes */ 	
            drop xb_c1 xb_c2 	
         }  /* End of loop over list of continuous variables */        	
	 
         
         ******************************************************************************************        	 
         *****
         *Age* 	
         *****

         qui gen double xb_c1 = xb0 
         assert xb_c1~=. if xb0~=.   	        	
	    	        	
         *Incrementing by the already determined sum	
         qui gen double xb_c2 = xb_c1 + b_age[`i']*(ch_age/sc_age) + ///
            b_agesq[`i']*(2*age*(ch_age/sc_age) + ((ch_age/sc_age)^2))
         assert xb_c2~=. if xb0~=.   	        	
    	
         *Loop over the marginal effect 	
         forvalues tt=1/$wcp { 	
            local wpf: word `tt' of ${prfx} 	
     	
            if `tt'==1 {    	
                 qui gen double pdif_age= normal(xb_c2) - normal(xb_c1) 	
            }    	
            if `tt'==2 {             	
               qui ge double pdif_age = (normal(xb_c2) - normal(xb_c1))/normal(xb_c1)           	  
            }    	
            assert pdif_age~=.    	

            qui sum pdif_age [aw = $wgt], meanonly                    	

            if `i'==1 {                   	
               qui ge double `wpf'_age = $dumval                   	
            }                   	
            *Putting the effect at the appropriate line in the file
            qui replace `wpf'_age = r(mean) in `sr'                      	
	    assert `wpf'_age<$dumval if _n==`sr' 	
            drop pdif_age	     		         	

         }   /* End of loop over magnitudes */ 	
         drop xb_c1 xb_c2 	

         **********************************************************************************	 

         *Dropping linear prediction based on current draw
         drop xb0 

         forvalues kk=1/10 {
            if `sr'==$nsim*(`kk'/10) {
               di in yellow "Iteration `sr'-1, ${as}"
            }
         }      

         *If the number of valid boostrap runs is equal to the predetermined
         *one then the loop over bootstrap runs stops
         local ncpp=$nsim+1
         if `sr'==`ncpp' {
            di in yellow "End of ${nsim} valid bootstrap runs, total runs equal to `i'"
            continue, break
         }
      
      }  /* End of loop over bootstrap runs */

      *Dropping obs beyond the line of the last valid bootstrap run
      drop if _n>$nsim + 1
   
      *Global of marginal effects, putting first the probabilities of interest
      local me pr_usall_${as}_own
      foreach y in $dumlist_meff $grouplist_meff $conlist_meff age {
         if "`y'"~="agesq" {
            local me `me' me_`y'
         }
      }
      
      *Keeping only the marginal effects 
      keep country `me' 

      *Calculating the variance of the generated average probabilities and marginal effects
      *One has to start from the 2nd line, since the first line gives the magnitudes
      *evaluated at the estimated coefficients
      local ncpp=${nsim}+1
      foreach vr in `me' {

         qui su `vr' in 2/`ncpp' 

         qui gen double se_`vr'=sqrt(r(Var))
      }


      *Calculating the point estimate as the mean of the effects across draws
      *We don't change the point estimate of the ownership proportion since we use the
      *actual one
      foreach vr in `me'  {
         if "`vr'"~="pr_usall_${as}_own" {
            qui su `vr' in 2/`ncpp', meanonly 
            qui replace `vr' = r(mean) in 1
         }
      }

      *Saving the dataset with all the probabilities and marginal effects
      qui save "${projf}/Results/HRS/Country/${as}/boot_alldraweff_usall_${as}.dta", replace
   
      *Saving the dataset with only the mean magnitudes and their standard errors
      qui keep in 1
      qui save "${projf}/Results/HRS/Country/${as}/boot_meaneff_usall_${as}.dta", replace
 
      *Saving matrices of marginal effects as a text file

      foreach rs in $prfx  {
         *Number of elements
         local n_`rs': word count ``rs''
         matrix olout=J(`n_`rs'',6,.)

         forvalues k=1/`n_`rs'' {
            local h: word `k' of ``rs''
            matrix olout[`k',1] = `h'[1]
            matrix olout[`k',2] = se_`h'[1]
            matrix olout[`k',3] = olout[`k',1]/olout[`k',2]
            matrix olout[`k',4] = 2*ttail(nobs - ${nc},abs(olout[`k',3]))
            matrix olout[`k',5] = olout[`k',1] - invttail(nobs - ${nc},sigl/2)*olout[`k',2]
            matrix olout[`k',6] = olout[`k',1] + invttail(nobs - ${nc},sigl/2)*olout[`k',2]
         } 
    
         *Labels of matrix of magnitudes
         *Column labels
         local colmat `rs' se tstat pvalue ci_low ci_high
         matrix colnames olout = `colmat' 
         *Row labels
         matrix rownames olout = ``rs''
         *Displaying and saving the matrix of results
         mat list olout,format(%15.5f)
         mat2txt, mat(olout) saving("${projf}/Results/HRS/Country/${as}/boot_mefftab_usall_${as}.txt") replace format(%15.5f) ///
            title("Meff for ${as} in the US as a whole")

         erase "${projf}/Results/HRS/Country/${as}/temp.dta"
         erase temp2.dta
         
      }  /* End of loop over magnitudes */        

   }  /*End of loop over assets */    

   display "log file for the US as a whole opened on ${pdate} at ${ptime}"
   display "log file for the US as a whole closed on $S_DATE at $S_TIME"
   cap log close
   
   
  



